#delimit ;
clear all;
set more 1;
set maxvar 32000;
version 12;
local seed = 735;

*******************************************************************************;
*directories;
*******************************************************************************;

*Define the main directory;
global upper_dir = "";

global scratch_dir = "$upper_dir\scratch";

global paper_dir = "$upper_dir\tables_figures";

global nlsy_data_dir = "$upper_dir\data";

global dofile_dir = "$upper_dir\stata_code";


*******************************************************************************;
*data prep;
*******************************************************************************;

local do_here = 0;
if `do_here' == 1 {;

cd "$dofile_dir";
do data_prep.do;

cd "$scratch_dir";
save data_prep.dta, replace;

};


******************************************************************************;
*open full dataset;
******************************************************************************;

cd "$scratch_dir";
use data_prep.dta;

*check for balanced panel;
bysort mom_id_nlsy child_id_nlsy: egen temp = seq();
tab temp, m;
drop temp;

******************************************************************************;
*general sample selection for agostinelli-wiswall
******************************************************************************;

keep if child_age >= 0 & child_age <= 15;
keep if child_sex > 0; *a few observations have missing sex;

******************************************************************************;
*sample selection for agostinelli-wiswall
******************************************************************************;

*************************************************;
*edit some measures;
*************************************************;

replace faminc=faminc/10000;

*censor at 5,000 in family income;
replace faminc = 0.5 if faminc < 0.5 & faminc > 0;

*censor at 200,000;
replace faminc = 20 if faminc > 20 & faminc ~= .;

*income in logs;
replace faminc = log(faminc);	

*trim gest length;
replace gest_length = 43 if gest_length>43 & gest_length~=.;
replace gest_length = 30 if gest_length<30;

*trim birth weight;
replace weight_atbirth = 150 if weight_atbirth>150 & weight_atbirth~=.;
replace weight_atbirth = 70 if weight_atbirth<70;


*************************************************;
*save full sample;
*************************************************;

cd "$scratch_dir";
save temp_sample.dta, replace;


cd "$scratch_dir";
use temp_sample.dta, clear;

bysort child_age: correlate bpi* recog math comp ppvt;

bysort child_age: correlate tempE tempF tempG moto body;

******************************************************************************;
*measure labels
******************************************************************************;

*child cognitive measures;
local child_cog_measures = 
"gest_length weight_atbirth moto body loca ppvt math recog comp digit";

*child non-cognitive measures;
local child_noncog_measures = 
"tempA tempB tempC tempD tempE tempF tempG tempH tempI tempJ bpiA bpiB bpiC bpiD bpiE";

*goods investments;
local goods_investments = 
"number_books number_soft_toys number_push_toys number_magazines
has_tape_cd has_musical_instrument has_newspaper
";

*time investments;
local time_investments = 
"often_house often_mom_reads often_eats_mom 
often_mom_talks often_museum often_praised often_positive
often_see_family_friends ";

*other investments
local other_investments = 
"has_lessons attend_mus_perform";

*mom cognitive measures;
local mom_cog_measures = 
"asvab2 asvab3 asvab4 asvab5 asvab6 asvab8";

*mom non-cognitive measures;
local mom_noncog_measures = 
"se1 se2 se3 se4 se5 se6 se8 se9 se10 rotter1 rotter2 rotter3 rotter4";

local controls_imputation = " asvab2_dm asvab3_dm asvab4_dm asvab5_dm asvab6_dm asvab8_dm se1_dm se2_dm se3_dm se4_dm se5_dm se6_dm se8_dm se9_dm se10_dm rotter1_dm rotter2_dm rotter3_dm rotter4_dm";



******************************************************************************;
*which investment measures are significantly positively related to skills at each age?;
******************************************************************************;

local do_here = 0;
if `do_here' == 1 {;

cd "$scratch_dir";

local temp_list = "";

forvalues t = 12(1)12 {;

display `t';

use temp_sample.dta, clear;
keep if child_age == `t' + 2;
keep child_id_nlsy ppvt math recog comp;
sort child_id_nlsy;
save temp.dta, replace;

use temp_sample.dta, clear;
keep if child_age == `t';
sort child_id_nlsy;
merge child_id_nlsy using temp.dta;


foreach inv in `time_investments' {;

regress ppvt `inv';
regress recog `inv';
regress comp `inv';
regress math `inv';

};
};


*this comes from looking at t-stats by hand;

*age 5;
local time_investments5 = 
"often_house often_mom_reads often_museum";

*age 6;
local time_investments6 = 
"often_house often_mom_reads often_museum";

*age 7;
local time_investments7 = 
"often_mom_reads often_museum";

*age 8;
local time_investments8 = 
"often_mom_eats often_museum often_praised often_positive";

*age 9;
local time_investments9 = 
"often_eats_mom often_museum often_praised often_positive";

*age 10;
local time_investments10 = 
"often_eats_mom often_museum often_praised often_positive";

*age 11;
local time_investments11 = 
"often_eats_mom often_museum often_praised often_positive";

*age 12;
local time_investments12 = 
"often_eats_mom often_museum often_praised often_positive";

*age 13;
local time_investments13 = 
"often_eats_mom often_museum often_praised often_positive";


};



local do_here = 1;
if `do_here' == 1 {;

******************************************************************************;
*reshape wide by age;
******************************************************************************;

cd "$scratch_dir";
use temp_sample.dta, clear;
keep child_id_nlsy mom_id_nlsy child_age `child_cog_measures' `time_investments' faminc mom_school child_sex child_race;
reshape wide  
`child_cog_measures' `time_investments' faminc mom_school
, i(child_id_nlsy mom_id_nlsy) j(child_age)
;
sort child_id_nlsy;
save temp.dta, replace;

use temp_sample.dta, clear;
collapse `mom_cog_measures' `mom_noncog_measures' education wage_children, by(child_id_nlsy);
sort child_id_nlsy;
merge child_id_nlsy using temp.dta;
drop _merge;



*******************************************************************************;
*save wide dataset
*******************************************************************************;

compress;
sort child_id_nlsy;
cd "$scratch_dir";
save temp_sample_wide.dta, replace;

};



*******************************************************************************;
*Here we define what model to estimate and what estimator to use;

*Two different models;
* restricted_model = 0 (Non-General Model);
* restricted_model = 1 (General Model);

*In the empirical part of the paper we estimate the general model.
*To replicate the results in the paper, restricted_model should be set to 0.;

local restricted_model = 0;

*******************************************************************************;
*Two different estimators: allowing for measurement error correction or not;
* do_ols = 0 Measurement Error Corrected;
* do_ols = 1 Not Corrected;
local do_ols = 0;

*******************************************************************************;
*open wide dataset;
*******************************************************************************;


cd "$scratch_dir";
use temp_sample_wide.dta, clear;

save original_data.dta, replace;


*number of mother's;
egen temp = group(mom_id_nlsy);
sum temp;
local numb_mothers = r(max);
display `numb_mothers';


foreach var in math recog comp{;
forvalues age = 5(2)13{;
gen d_`var'_lb_`age' = (`var'`age'==0)  if `var'`age'!=.;
gen d_`var'_ub_`age' = (`var'`age'==84) if `var'`age'!=.;
};
};

sum *_lb*;
sum *_ub*;


*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*BEGIN ESTIMATION AND BOOTSTRAP ALGORITHM;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
set output proc;

*do estimates and bootstrap;
local do_here = 1;
if `do_here' == 1 {;
*blank dataset;
clear;
set obs 1;
gen blah = 1;
save temp_collect.dta, replace;
clear;

forvalues b = 0(1)100{;

set output proc;
display `b';

*open original data;
use original_data.dta, clear;

*b = 0 (original data), then produce estimates;

*bootstrap samples;
if `b' > 0 {;
local seed_here = `seed' + `b';
set seed `seed_here';

*cluster bootstrap;
bsample `numb_mothers', cluster(mom_id_nlsy);

*standard bootstrap;
*bsample _N;

};

*suppress ouput;
set output error;



foreach x in asvab2 asvab3 asvab4 asvab5 asvab6 asvab8 se1 se2 se3 se4 se5 se6 se8 se9 se10 rotter1 rotter2 rotter3 rotter4{;

qui sum `x';
gen `x'_dm = `x' - r(mean); 

};





*******************************************************************************;
*save dataset for merging below;
*******************************************************************************;

gen id = 1;
sort id;
save temp.dta, replace;


*******************************************************************************;
*factor loadings at period 5
*******************************************************************************;


gen temp1 = .;
gen temp2 = .;


***************************;
*child's cognitive
***************************;

gen lambda_math5 = 1;

correlate comp5 recog5, covariance;
replace temp1 = r(cov_12);
correlate math5 recog5, covariance;
replace temp2 = r(cov_12);

gen lambda_comp5 = temp1/temp2;

correlate comp5 recog5, covariance;
replace temp1 = r(cov_12);
correlate math5 comp5, covariance;
replace temp2 = r(cov_12);

gen lambda_recog5 = temp1/temp2;


***************************;
*mother cognitive
***************************;

local meas_norm = "asvab2";
local meas_list = "`mom_cog_measures'";

gen lambda_`meas_norm' = 1;

local index = 0;
foreach Z2 in `meas_list' {;

if "`Z2'" ~= "`meas_norm'" {;
gen lambda_`Z2' = .;
};

foreach Z3 in `meas_list' {;

if "`Z2'" ~= "`meas_norm'" & "`Z2'" ~= "`Z3'" & "`Z3'" ~= "`meas_norm'" {;

local index = `index' + 1;

correlate `Z2' `Z3', covariance;
replace temp1 = r(cov_12);
correlate `meas_norm' `Z3', covariance;
replace temp2 = r(cov_12);

replace lambda_`Z2' = temp1 / temp2 in `index';

};
};
};


***************************;
*mom non-cognitive measures;
***************************;

local meas_norm = "se1";
local meas_list = "`mom_noncog_measures'";

gen lambda_`meas_norm' = 1;

local index = 0;
foreach Z2 in `meas_list' {;

if "`Z2'" ~= "`meas_norm'" {;
gen lambda_`Z2' = .;
};

foreach Z3 in `meas_list' {;

if "`Z2'" ~= "`meas_norm'" & "`Z2'" ~= "`Z3'" & "`Z3'" ~= "`meas_norm'" {;

local index = `index' + 1;

correlate `Z2' `Z3', covariance;
replace temp1 = r(cov_12);
correlate `meas_norm' `Z3', covariance;
replace temp2 = r(cov_12);

replace lambda_`Z2' = temp1 / temp2 in `index';

};
};
};





**********************************************;
*lambda estimate is average over all possible lambda estimates;
**********************************************;

collapse lambda*;
gen id = 1;
sort id;
merge id using temp.dta;
drop _merge;


*******************************************************************************;
*create tilde measures for initial factors;
*******************************************************************************;

local cog_list5 = "math5 recog5 comp5";

foreach X in `mom_cog_measures' `mom_noncog_measures' `cog_list5' {;
sum `X';
gen mu_`X' = r(mean);
gen `X'_tilde = (`X' - mu_`X') / lambda_`X';
};

gen faminc5_tilde = faminc5;

*******************************************************************************;
*save dataset for merging below;
*******************************************************************************;

sort id;
cd "$scratch_dir";
save temp.dta, replace;

*******************************************************************************;
*covariance estimates;
*******************************************************************************;

local log_inc_meas = "faminc5";


*var log income;
sum `log_inc_meas';
gen var_`log_inc_meas' = r(Var);


*var child cognitive;
local index = 0;
gen var_child_cog = .;
foreach Z1 in `cog_list5' {;
foreach Z2 in `cog_list5' {;

if "`Z1'" ~= "`Z2'" {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace var_child_cog = r(cov_12) in `index';

};
};
};




*var mother cognitive measures;
local index = 0;
gen var_mom_cog = .;
foreach Z1 in `mom_cog_measures' {;
foreach Z2 in `mom_cog_measures' {;

if "`Z1'" ~= "`Z2'" {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace var_mom_cog = r(cov_12) in `index';

};
};
};

*var mother non-cognitive measures;
local index = 0;
gen var_mom_noncog = .;
foreach Z1 in `mom_noncog_measures' {;
foreach Z2 in `mom_noncog_measures' {;

if "`Z1'" ~= "`Z2'" {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace var_mom_noncog = r(cov_12) in `index';

};
};
};

*covariance child cog and mom cog;
local index = 0;
gen cov_child_cog_mom_cog = .;
gen corr_child_cog_mom_cog = .;
foreach Z1 in `cog_list5' {;
foreach Z2 in `mom_cog_measures' {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace cov_child_cog_mom_cog = r(cov_12) in `index';


};
};



*covariance child cog and mom non cog;
local index = 0;
gen cov_child_cog_mom_noncog = .;
gen corr_child_cog_mom_noncog = .;


foreach Z1 in `cog_list5' {;
foreach Z2 in `mom_noncog_measures' {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace cov_child_cog_mom_noncog = r(cov_12) in `index';

};
};

*covariance mom cog and mom non cog;
local index = 0;
gen cov_mom_cog_mom_noncog = .;
gen corr_mom_cog_mom_noncog = .;


foreach Z1 in `mom_cog_measures' {;
foreach Z2 in `mom_noncog_measures' {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace cov_mom_cog_mom_noncog = r(cov_12) in `index';

};
};

*covariance child cog and log income;
local index = 0;
gen cov_child_cog_log_inc = .;
gen corr_child_cog_log_inc = .;


foreach Z1 in `cog_list5' {;
foreach Z2 in `log_inc_meas' {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace cov_child_cog_log_inc = r(cov_12) in `index';

};
};

*covariance mom cog and log income;
local index = 0;
gen cov_mom_cog_log_inc = .;
gen corr_mom_cog_log_inc = .;


foreach Z1 in `mom_cog_measures' {;
foreach Z2 in `log_inc_meas' {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace cov_mom_cog_log_inc = r(cov_12) in `index';


};
};


*covariance mom noncog and log income;
local index = 0;
gen cov_mom_noncog_log_inc = .;
gen corr_mom_noncog_log_inc = .;



foreach Z1 in `mom_noncog_measures' {;
foreach Z2 in `log_inc_meas' {;

local index = `index' + 1;
correlate `Z1'_tilde `Z2'_tilde, covariance;
replace cov_mom_noncog_log_inc = r(cov_12) in `index';

};
};



collapse var* cov* corr*;
      
replace corr_child_cog_mom_cog = cov_child_cog_mom_cog/(sqrt(var_child_cog)*sqrt(var_mom_cog));
replace corr_child_cog_mom_noncog = cov_child_cog_mom_noncog/(sqrt(var_child_cog)*sqrt(var_mom_noncog));
replace corr_mom_cog_mom_noncog = cov_mom_cog_mom_noncog/(sqrt(var_mom_cog)*sqrt(var_mom_noncog));
replace corr_child_cog_log_inc = cov_child_cog_log_inc/(sqrt(var_child_cog)*sqrt(var_faminc5));
replace corr_mom_cog_log_inc = cov_mom_cog_log_inc/(sqrt(var_mom_cog)*sqrt(var_faminc5));
replace corr_mom_noncog_log_inc = cov_mom_noncog_log_inc/(sqrt(var_mom_noncog)*sqrt(var_faminc5));


collapse var* cov* corr*;
gen id = 1;
sort id;
merge id using temp.dta;
drop _merge;


*******************************************************************************;
*save for merging below;
*******************************************************************************;

sort id;
save temp.dta, replace;


*******************************************************************************;
*income process
*******************************************************************************;

keep faminc*;
egen child_id_here = seq(); *need new id for bootstrap;

reshape long faminc@, i(child_id_here) j(child_age);

keep if child_age >= 5 & child_age <= 13;

tsset child_id_here child_age;
bysort child_id_here: gen faminc_tm2 = faminc[_n-2];
*gen time_trend = child_age + 2;

*with time trend;
*regress faminc faminc_tm2 time_trend;

*without time trend;
regress faminc faminc_tm2;
gen inc_beta_const = _b[_cons];
gen inc_beta_tm2 = _b[faminc_tm2];
*gen inc_beta_trend = _b[time_trend]; 


*prediction and residual;
predict faminc_predict if e(sample);
gen resid = faminc - faminc_predict;
sum resid;
gen income_shock_std = r(sd);

sum faminc5 if child_age == 5;
gen mean_income_age5 = r(mean);

keep in 1;
keep inc_beta_const inc_beta_tm2 income_shock_std mean_income_age5;
gen id = 1;
sort id;
merge id using temp.dta;
drop _merge;


*******************************************************************************;
*create missing values, dummies
*******************************************************************************;

forvalues t = 0(1)15 {;
foreach X in `child_cog_measures' `time_investments' faminc {;

gen `X'`t'_m = `X'`t';

};
};

foreach X in `mom_cog_measures' `mom_noncog_measures' {;

gen `X'_m = `X';

};


*******************************************************************************;
*create tilde measures for mother's measures when missing
*******************************************************************************;

foreach X in `mom_cog_measures' `mom_noncog_measures' `cog_list5' {;

gen `X'_m_tilde = (`X'_m - mu_`X') / lambda_`X';

};

*******************************************************************************;
*In the general model ( restricted_model= 0) 
* we assume cognitive measures (PIAT MATH) are age invariant;
*******************************************************************************;

if `restricted_model'==0 {;

*local cog_list = "math recog comp";

local cog_list = "math";

foreach X in `cog_list' {;
forvalues t = 7(2)13 {;
gen mu_`X'`t' = mu_`X'5;
gen lambda_`X'`t' = lambda_`X'5;
gen `X'`t'_m_tilde = (`X'`t'_m - mu_`X'`t') / lambda_`X'`t';
};
};


}; *end restricted_model ; 


sum lambda_math* mu_math*;


**********************************;
*Predict mother cognitive skills;
**********************************;

if `do_ols'==0{;
reg asvab2_m_tilde asvab3 asvab4 asvab5 asvab6 asvab8 ;
predict asvab2_m_tilde_hat if e(sample);
};

if `do_ols'==1{;
gen asvab2_m_tilde_hat = asvab2_m_tilde;
};

**********************************;
*Predict mother non-cognitive skills;
**********************************;
if `do_ols'==0{;
reg se1_m_tilde se2 se3 se4 se5 se6 se8 se9 se10 rotter1 rotter2 rotter3 rotter4;
predict se1_m_tilde_hat if e(sample);
};

if `do_ols'==1{;
gen se1_m_tilde_hat = se1_m_tilde;
};

*Predict child-skills;

forvalues t = 5(2)13 {;

if `do_ols'==0{;
reg math`t'_m_tilde  recog`t'  comp`t';
predict math`t'_m_tilde_hat if e(sample);

reg math`t'_m_tilde  recog`t'  ;
predict math`t'_m_tilde_hat2 if e(sample);
replace math`t'_m_tilde_hat = math`t'_m_tilde_hat2 if math`t'_m_tilde_hat==.;

reg math`t'_m_tilde  comp`t'  ;
predict math`t'_m_tilde_hat3 if e(sample);
replace math`t'_m_tilde_hat = math`t'_m_tilde_hat3 if math`t'_m_tilde_hat==.;




reg math`t'_m_tilde `controls_imputation';
predict math`t'_m_tilde_hat4, xb;
replace math`t'_m_tilde_hat = math`t'_m_tilde_hat4 if math`t'_m_tilde_hat==.;


drop math`t'_m_tilde_hat2 math`t'_m_tilde_hat3 math`t'_m_tilde_hat4;

};

if `do_ols'==1{;
gen math`t'_m_tilde_hat = math`t'_m_tilde;

reg math`t'_m_tilde `controls_imputation';
predict math`t'_m_tilde_hat2, xb;
replace math`t'_m_tilde_hat = math`t'_m_tilde_hat2 if math`t'_m_tilde_hat==.;

drop math`t'_m_tilde_hat2;

};



};




*******************************************************************************;
*investment function: age 5 output, age 5 inputs
*******************************************************************************;

forvalues t = 5(2)11 {;

*******************************************************************************;
*PART 1: estimate investment function;
*******************************************************************************;

local mom_cog_meas = "asvab2_m_tilde_hat";
local mom_noncog_meas = "se1_m_tilde_hat";


if `t' == 5 {;

local inv_meas = "often_mom_reads5";
local inv_meas2 = "often_museum5";
local skill_meas = "math5_m_tilde_hat";
local inc_meas = "faminc5_m";

};

if `t' == 7 {;

local inv_meas = "often_mom_reads7";
local inv_meas2 = "often_museum7";
local skill_meas = "math7_m_tilde_hat";
local inc_meas = "faminc7_m";

};


if `t' == 9 {;

local inv_meas = "often_museum9";
local inv_meas2 = "often_praised9";
local skill_meas = "math9_m_tilde_hat";
local inc_meas = "faminc9_m";

};


if `t' == 11 {;

local inv_meas = "often_praised11";
local inv_meas2 = "often_museum11";
local skill_meas = "math11_m_tilde_hat";
local inc_meas = "faminc11_m";
};



***********************************************;
*estimate main part of investment function
***********************************************;

reg `inv_meas' `inc_meas'  `skill_meas' `mom_cog_meas' `mom_noncog_meas', robust;


predict predict_`inv_meas' if e(sample);
gen resid_`inv_meas' = `inv_meas' - predict_`inv_meas';

gen beta1 = _b[`skill_meas'];
gen beta2 = _b[`inc_meas'];
gen beta3 = _b[`mom_cog_meas'];
gen beta4 = _b[`mom_noncog_meas'];
gen lambda_`inv_meas' = beta1 + beta2 + beta3 + beta4;
gen mu_`inv_meas' = _b[_cons];

forvalues j = 1(1)4 {;
gen alpha`j'_age`t'_1 = beta`j' / lambda_`inv_meas';
};

drop beta*;


**************************************;
*compute variance of investment shock;
**************************************;

reg `inv_meas2' `inc_meas'  `skill_meas' `mom_cog_meas' `mom_noncog_meas', robust;


gen beta1 = _b[`skill_meas'];
gen beta2 = _b[`inc_meas'];
gen beta3 = _b[`mom_cog_meas'];
gen beta4 = _b[`mom_noncog_meas'];
gen mu_`inv_meas2' = _b[_cons];
gen lambda_`inv_meas2' = beta1 + beta2 + beta3 + beta4;

forvalues j = 1(1)4 {;
gen alpha`j'_age`t'_2 = beta`j' / lambda_`inv_meas2';
};

forvalues j = 1(1)4 {;
gen alpha`j'_age`t' = ( alpha`j'_age`t'_1 + alpha`j'_age`t'_2)/2;
drop alpha`j'_age`t'_1 alpha`j'_age`t'_2;
};

drop beta*;

gen `inv_meas2'_tilde = (`inv_meas2' - mu_`inv_meas2')/ lambda_`inv_meas2';

gen resid_`inv_meas'_tilde = resid_`inv_meas' / lambda_`inv_meas';

correlate resid_`inv_meas'_tilde `inv_meas2'_tilde, covariance;

gen cov_inv_age`t' = r(cov_12);
gen std_inv_age`t' = sqrt(cov_inv_age`t');


*****************************************************************;
*compute tilde for investment for evolution of skills at age 7;
*****************************************************************;

foreach X in `inv_meas' `inv_meas2' {;
gen `X'_m_tilde = (`X'_m - mu_`X')  / lambda_`X';
};


*******************************************************************************;
*PART 2: production function: age 7 output, age 5 inputs;
*******************************************************************************;

if `t' == 5 {;

*Y variable;
local skill_meas_out = "math7";
local skill_meas_out2 = "recog7";
local skill_meas_out3 = "comp7";

*X variables;
*skills;
local skill_meas_in = "math5_m_tilde_hat";

*investments;
if `do_ols'==0{;
reg often_mom_reads`t'_m_tilde  often_museum`t'_m;
predict often_mom_reads`t'_m_tilde_hat if e(sample);
};

if `do_ols'==1{;
gen often_mom_reads`t'_m_tilde_hat = often_mom_reads`t'_m_tilde;
};


reg often_mom_reads`t'_m_tilde `controls_imputation';
predict often_mom_reads`t'_m_tilde_hat2, xb;
replace often_mom_reads`t'_m_tilde_hat = often_mom_reads`t'_m_tilde_hat2 if often_mom_reads`t'_m_tilde_hat==.;
drop often_mom_reads`t'_m_tilde_hat2;
local inv_meas_in = "often_mom_reads`t'_m_tilde_hat";


if `do_ols'==0{;
gen int_in = math5_m_tilde_hat*often_mom_reads5_m_tilde_hat;

*Z instruments;
gen int1 = recog5_m * often_museum5_m;
gen int2 = comp5_m * often_museum5_m;
reg int_in recog5_m comp5_m often_museum5_m int1 int2 ;
predict int_in_hat if e(sample);
};

if `do_ols'==1{;
gen int_in = math5_m_tilde*often_mom_reads5_m_tilde;
gen int_in_hat = int_in;
};

reg int_in  `controls_imputation';
predict int_in_hat2, xb;
replace int_in_hat = int_in_hat2 if int_in_hat==.;
local skill_inv_meas_in = "int_in_hat";


gen sample_`t' = ( math`t'!=. |  often_mom_reads5!=.);

};


if `t' == 7 {;

*Y variable;
local skill_meas_out = "math9";
local skill_meas_out2 = "recog9";
local skill_meas_out3 = "comp9";

*X variables;
local skill_meas_in = "math7_m_tilde_hat";

if `do_ols'==0{;
reg often_mom_reads`t'_m_tilde  often_museum`t'_m;
predict often_mom_reads`t'_m_tilde_hat if e(sample);
};

if `do_ols'==1{;
gen often_mom_reads`t'_m_tilde_hat = often_mom_reads`t'_m_tilde;
};


reg often_mom_reads`t'_m_tilde `controls_imputation';
predict often_mom_reads`t'_m_tilde_hat2, xb;
replace often_mom_reads`t'_m_tilde_hat = often_mom_reads`t'_m_tilde_hat2 if often_mom_reads`t'_m_tilde_hat==.;
drop often_mom_reads`t'_m_tilde_hat2;
local inv_meas_in = "often_mom_reads`t'_m_tilde_hat";



if `do_ols'==0{;

gen int_in = math7_m_tilde_hat*often_mom_reads7_m_tilde_hat;
*Z instruments;
gen int1 = recog7_m * often_museum7_m;
gen int2 = comp7_m * often_museum7_m;
reg int_in recog7_m comp7_m often_museum7_m int1 int2 ;
predict int_in_hat if e(sample);
};

if `do_ols'==1{;
gen int_in = math7_m_tilde*often_mom_reads7_m_tilde;
gen int_in_hat = int_in;

};

reg int_in  `controls_imputation';
predict int_in_hat2, xb;
replace int_in_hat = int_in_hat2 if int_in_hat==.;
local skill_inv_meas_in = "int_in_hat";


gen sample_`t' = ( math`t'!=. |  often_mom_reads`t'!=.);
};


if `t' == 9 {;

*Y variable;
local skill_meas_out = "math11";
local skill_meas_out2 = "recog11";
local skill_meas_out3 = "comp11";

*X variables;
local skill_meas_in = "math9_m_tilde_hat";

if `do_ols'==0{;
reg often_museum`t'_m_tilde often_praised9_m often_mom_reads9_m;
predict often_museum`t'_m_tilde_hat if e(sample);
};

if `do_ols'==1{;
gen often_museum`t'_m_tilde_hat = often_museum`t'_m_tilde;
};

reg often_museum`t'_m_tilde `controls_imputation';
predict often_museum`t'_m_tilde_hat2, xb;
replace often_museum`t'_m_tilde_hat = often_museum`t'_m_tilde_hat2 if often_museum`t'_m_tilde_hat==.;
drop often_museum`t'_m_tilde_hat2;
local inv_meas_in = "often_museum`t'_m_tilde_hat";






if `do_ols'==0{;
gen int_in = math9_m_tilde_hat*often_museum9_m_tilde_hat;
*Z instruments;
gen int1 = recog9_m * often_praised9_m;
gen int2 = comp9_m * often_praised9_m;
reg int_in recog9_m comp9_m often_praised9_m int1 int2 ;
predict int_in_hat if e(sample);
};

if `do_ols'==1{;
gen int_in = math9_m_tilde*often_museum9_m_tilde;
gen int_in_hat = int_in;
};

reg int_in  `controls_imputation';
predict int_in_hat2, xb;
replace int_in_hat = int_in_hat2 if int_in_hat==.;
local skill_inv_meas_in = "int_in_hat";

gen sample_`t' = ( math`t'!=. |  often_museum`t'!=.);

};


if `t' == 11 {;

*Y variable;
local skill_meas_out = "math13";
local skill_meas_out2 = "recog13";
local skill_meas_out3 = "comp13";

*X variables;
local skill_meas_in = "math11_m_tilde_hat";


if `do_ols'==0{;
reg  often_praised11_m_tilde often_museum`t'_m;
predict often_praised11_m_tilde_hat if e(sample);
};

if `do_ols'==1{;
gen often_praised11_m_tilde_hat = often_praised11_m_tilde;
};

reg often_praised11_m_tilde `controls_imputation';
predict often_praised11_m_tilde_hat2, xb;
replace often_praised11_m_tilde_hat = often_praised11_m_tilde_hat2 if often_praised11_m_tilde_hat==.;
drop often_praised11_m_tilde_hat2;
local inv_meas_in = "often_praised11_m_tilde_hat";


if `do_ols'==0{;
gen int_in = math11_m_tilde_hat*often_praised11_m_tilde_hat;
*Z instruments;
gen int1 = recog11_m * often_museum`t'_m;
gen int2 = comp11_m * often_museum`t'_m;
reg int_in recog11_m comp11_m often_museum`t'_m int1 int2 ;
predict int_in_hat if e(sample);
};

if `do_ols'==1{;
gen int_in = math11_m_tilde*often_praised11_m_tilde;
gen int_in_hat = int_in;
};

reg int_in  `controls_imputation';
predict int_in_hat2, xb;
replace int_in_hat = int_in_hat2 if int_in_hat==.;
local skill_inv_meas_in = "int_in_hat";


gen sample_`t' = ( math`t'!=. |  often_praised`t'!=.);


};


********************************************;
*estimate main part of production function;
********************************************;

reg `skill_meas_out'  `skill_meas_in' `inv_meas_in' `skill_inv_meas_in' if sample_`t'==1, r;
gen sample_`skill_meas_out' = e(sample);


predict predict_`skill_meas_out' if e(sample);
gen resid_`skill_meas_out' = `skill_meas_out' - predict_`skill_meas_out';

gen beta0 = _b[_cons];
gen beta1 = _b[`skill_meas_in'];
gen beta2 = _b[`inv_meas_in'];
gen beta3 = _b[`skill_inv_meas_in'];

if `restricted_model'==1 {;
gen lambda_`skill_meas_out' = beta1 + beta2 + beta3;
gen mu_`skill_meas_out' = _b[_cons];
gen `skill_meas_out'_m_tilde = (`skill_meas_out'_m - mu_`skill_meas_out') / lambda_`skill_meas_out';
gen `skill_meas_out'_tilde = (`skill_meas_out' - mu_`skill_meas_out') / lambda_`skill_meas_out';

};



forvalues j = 1(1)3 {;
gen gamma`j'_age`t' = beta`j' / lambda_`skill_meas_out';
};


gen gamma_return_scale_age`t' = gamma1_age`t' + gamma2_age`t' + gamma3_age`t';


***************;
*TFP;
***************;
if `restricted_model'==0 {;

*Compute TFP;

gen log_TFP_age`t' = (beta0 - mu_`skill_meas_out') / lambda_`skill_meas_out';

};

if `restricted_model'==1 {;

*No TFP;

gen log_TFP_age`t' = 0;

};

*************************;
*Compute lambdas t+2 for the other two measures of children skills;
*************************;

*Second measure;
*Notice that this is equivalent to Cov(M2,M3)/Cov(M3,M1_tilde) because of the IV formula;
ivregress 2sls `skill_meas_out2' (`skill_meas_out'_m_tilde = `skill_meas_out3') if sample_`t'==1, robust;

gen mu_`skill_meas_out2' = _b[_cons];
gen lambda_`skill_meas_out2'= _b[`skill_meas_out'_m_tilde];


gen `skill_meas_out2'_m_tilde = (`skill_meas_out2'_m - mu_`skill_meas_out2') / lambda_`skill_meas_out2';


*Third measure;
*Notice that this is equivalent to Cov(M3,M2)/Cov(M2,M1_tilde) because of the IV formula;
ivregress 2sls `skill_meas_out3' (`skill_meas_out'_m_tilde = `skill_meas_out2') if sample_`t'==1, robust;

gen mu_`skill_meas_out3' = _b[_cons];
gen lambda_`skill_meas_out3'= _b[`skill_meas_out'_m_tilde];


gen `skill_meas_out3'_m_tilde = (`skill_meas_out3'_m - mu_`skill_meas_out3') / lambda_`skill_meas_out3';


****************************;
*variance of shock;
****************************;

gen resid_`skill_meas_out'_tilde = resid_`skill_meas_out'/lambda_`skill_meas_out';
correlate resid_`skill_meas_out'_tilde `skill_meas_out2'_m_tilde if sample_`t'==1, covariance;
gen cov_prod_age`t' = r(cov_12);

gen std_prod_age`t' = sqrt(cov_prod_age`t');

drop beta* int*  ;

}; *age loop;

*********************************;
*Estimate Anchoring Equation;
*********************************;
*Schooling;
*set output proc;

if `do_ols'==0{; 
ivreg education (math13_m_tilde = recog13 comp13), r;
};
if `do_ols'==1{; 
reg education math13_m_tilde , r;
};

*Estimate the variance of the anchoring shock;
predict predict_education if e(sample);
gen alpha_anchor0 = _b[_cons];
gen alpha_anchor1 = _b[math13_m_tilde];


gen resid_education = education - predict_education;
correlate education resid_education, covariance;
gen alpha_anchor2 = r(cov_12);


*Log wages;
gen log_wage_child = log(wage_children);

*set output proc;
if `do_ols'==0{; 
ivreg log_wage_child (math13_m_tilde = recog13 comp13), r;
};
if `do_ols'==1{; 
reg log_wage_child math13_m_tilde , r;
};

predict predict_wage if e(sample);

gen alpha_anchor0_wage = _b[_cons];
gen alpha_anchor1_wage = _b[math13_m_tilde];

*Estimate the variance of the anchoring shock;
gen resid_wage = log_wage_child - predict_wage;
correlate log_wage_child resid_wage, covariance;
gen alpha_anchor2_wage = r(cov_12);





if `b'==0{;


*****************************************************************************;
*Figures in Appendix F: Figures F1-F4
*****************************************************************************;

forvalues age = 5(2)11{;

local age_tp1 = `age' + 2 ;

hist math`age' if sample_math`age_tp1'==1 , title("PIAT MATH (Age `age')") graphregion(fcolor(white)) color(black) name(gr_math`age'); 
graph export $paper_dir\math_distribution_`age'.eps, replace;

hist recog`age' if sample_math`age_tp1'==1 , title("PIAT RECOG (Age `age')") graphregion(fcolor(white)) color(black) name(gr_recog`age');
graph export $paper_dir\recog_distribution_`age'.eps, replace;

hist comp`age' if sample_math`age_tp1'==1 , title("PIAT COMP (Age `age')") graphregion(fcolor(white)) color(black) name(gr_comp`age');
graph export $paper_dir\comp_distribution_`age'.eps, replace;

};

hist math`age_tp1' if sample_math`age_tp1'==1 , title("PIAT MATH (Age `age_tp1')") graphregion(fcolor(white)) color(black) name(gr_math`age_tp1'); 
graph export $paper_dir\math_distribution_`age_tp1'.eps, replace;

hist recog`age_tp1' if sample_math`age_tp1'==1 , title("PIAT RECOG (Age `age_tp1')") graphregion(fcolor(white)) color(black) name(gr_recog`age_tp1');
graph export $paper_dir\recog_distribution_`age_tp1'.eps, replace;

hist comp`age_tp1' if sample_math`age_tp1'==1 , title("PIAT COMP (Age `age_tp1')") graphregion(fcolor(white)) color(black) name(gr_comp`age_tp1');
graph export $paper_dir\comp_distribution_`age_tp1'.eps, replace;




graph combine gr_math5 gr_math7 gr_math9 gr_math11 gr_math13, graphregion(fcolor(white)) rows(3);
graph export $paper_dir\fig_f1.eps, replace;

graph combine gr_comp5 gr_comp7 gr_comp9 gr_comp11 gr_comp13, graphregion(fcolor(white)) rows(3);
graph export $paper_dir\fig_f2.eps, replace;

graph combine gr_recog5 gr_recog7 gr_recog9 gr_recog11 gr_recog13, graphregion(fcolor(white)) rows(3);
graph export $paper_dir\fig_f3.eps, replace;



******************************;
*Appendix B: Table B-8;
******************************;
set output proc;
forvalues age = 5(2)11{;

local age_tp1 = `age' + 2 ;

sum math`age'  recog`age'  comp`age' if sample_math`age_tp1'==1;

};
sum math13  recog13  comp13 if sample_math13==1;

set output error;


gen often_mom_reads_sample = .;
replace often_mom_reads_sample = often_mom_reads5 if sample_math7==1;
replace often_mom_reads_sample = often_mom_reads7 if sample_math9==1;


gen often_museum_sample = .;
replace often_museum_sample = often_museum5 if sample_math7==1;
replace often_museum_sample = often_museum7 if sample_math9==1;
replace often_museum_sample = often_museum9 if sample_math11==1;
replace often_museum_sample = often_museum11 if sample_math13==1;


gen often_praised_sample = .;
replace often_praised_sample = often_praised9 if sample_math11==1;
replace often_praised_sample = often_praised11 if sample_math13==1;



*checking the number of missing obs for skills and investment measures by age;

sum math5 recog5 comp5 often_mom_reads5 often_museum5 sample_math7 if sample_math7==1;
sum math7 recog7 comp7 often_mom_reads7 often_museum7 sample_math9 if sample_math9==1;
sum math9 recog9 comp9 often_mom_reads9 often_praised9 often_museum9 sample_math11 if sample_math11==1;
sum math11 recog11 comp11 often_praised11 often_museum11 sample_math13 if sample_math13==1;


*********************;
*Appendix B: Table B-9;
*********************;
set output proc;
sum often_mom_reads_sample often_museum_sample often_praised_sample  ;
set output error;


preserve;

keep math5 math7 math9 math11 math13  recog5 recog7 recog9 recog11 recog13 comp5 comp7 comp9 comp11 comp13 
sample_math7 sample_math9 sample_math11 sample_math13 child_sex child_race child_id_nlsy mom_id_nlsy 
faminc5  faminc7  faminc9  faminc11  faminc13  
mom_school5  mom_school7  mom_school9  mom_school11  mom_school13 
education wage_children
asvab2 asvab3 asvab4 asvab5 asvab6 asvab8 se1 se2 se3 se4 se5 se6 se8 se9 se10 rotter1 rotter2 rotter3 rotter4
often_mom_reads5 often_museum5
often_mom_reads7  often_museum7
often_museum9   often_praised9
often_praised11 often_museum11;

reshape long math@ recog@ comp@ sample_math@ faminc@ mom_school@ often_mom_reads@ often_museum@ often_praised@ , i(child_id_nlsy) j(child_age);

keep if sample_math==1;

hist math if child_age==5 | child_age==7 | child_age==9 | child_age==11 | child_age==13  , title("PIAT MATH") graphregion(fcolor(white)) color(black); 
graph export $paper_dir\math_distribution_allages.eps, replace;

hist recog if child_age==5 | child_age==7 | child_age==9 | child_age==11 | child_age==13 , title("PIAT RECOG") graphregion(fcolor(white)) color(black);
graph export $paper_dir\recog_distribution_allages.eps, replace;

hist comp if child_age==5 | child_age==7 | child_age==9 | child_age==11 | child_age==13 , title("PIAT COMP") graphregion(fcolor(white)) color(black);
graph export $paper_dir\comp_distribution_allages.eps, replace;


***********************************************************;
*Table 1 (Sample Statistics) and Appendix Figures F1-F4;
***********************************************************;

local list_var_stats = " child_sex child_race";
*Number of obs in the dataset;
sum child_id_nlsy ;
gen n_obs = r(N);

*Calculate number of children;
by child_id_nlsy, sort: gen temp_child = _n == 1;
count if temp_child;
gen n_children = r(N);
drop temp_child;

*Calculate number of Mothers;
by mom_id_nlsy, sort: gen temp_mom = _n == 1;
count if temp_mom;
gen n_mothers = r(N);
drop temp_mom;

foreach x in `list_var_stats' {;

tab `x', gen(`x');

local n_rows = r(r);

forvalues i = 1(1)`n_rows' {;


sum `x'`i';

gen freq_`x'_`i' = 100*r(mean);

drop `x'`i';

};

};
 
 gen family_income = 10000*exp(faminc);
 
 local list_var_stats2 = " mom_school family_income education wage_children";
 
 
 
foreach x in `list_var_stats2' {;

sum `x';

gen mean_`x' = r(mean);

gen std_`x' = r(sd);

};


drop family_income;



collapse (mean) n_obs n_mothers n_children freq_* mean_* std_*;

order n_obs n_mothers n_children freq_* mean_mom std_mom mean_family std_family mean_educat std_educat;



file open resultsfile using "$paper_dir\sample_stats.tex", write replace;
file write resultsfile "\begin{tabular}{lcc}" _n;
file write resultsfile "\hline \hline" _n;
file write resultsfile "\\[0.2cm]" _n;
file write resultsfile " & Mean & SD \\ " _n;
file write resultsfile "\hline" _n;
file write resultsfile "\\[0.2cm]" _n;

local i = 1;
foreach x in n_obs n_mothers n_children freq_child_sex_1 freq_child_sex_2 freq_child_race_1 freq_child_race_2 freq_child_race_3 mean_mom_school std_mom_school mean_family_income std_family_income mean_education std_education {;


qui sum `x';
if `i'<4{;
local column1:  di %9.0f r(mean) ;
};
else{;
local column1:  di %6.2f r(mean) ;
};

if `i'==1{;
file write resultsfile "   N Obs            & `column1'  &  \\[0.2cm] " _n;
};
if `i'==2{;
file write resultsfile "   N of Mothers            & `column1'  &  \\[0.2cm] " _n;
};
if `i'==3{;
file write resultsfile "   N of Children            & `column1'  &  \\[0.2cm] " _n;
};
if `i'==4{;
file write resultsfile "   \% Male Children            & `column1'  &  \\[0.2cm] " _n;
};
if `i'==5{;
file write resultsfile "   \% Female Children             & `column1'  &  \\[0.2cm] " _n;
};
if `i'==6{;
file write resultsfile "   \% Hispanic Children             & `column1'  &  \\[0.2cm] " _n;
};
if `i'==7{;
file write resultsfile "   \% Black Children             & `column1'  &  \\[0.2cm] " _n;
};
if `i'==8{;
file write resultsfile "   \% Other Races             & `column1'  &  \\[0.2cm] " _n;
};

local i = `i' + 1;
};


qui sum mean_mom_school,de ;
local column1:  di %6.2f r(mean) ;
qui sum std_mom_school;
local column2:  di %6.2f r(mean) ;
file write resultsfile "   Mom Education             & `column1'  &  `column2'   \\[0.2cm] " _n;

qui sum mean_family_income,de ;
local column1:  di %9.2f r(mean) ;
qui sum std_family_income;
local column2:  di %9.2f r(mean) ;
file write resultsfile "   Family Income             & `column1'  &  `column2'   \\[0.2cm] " _n;

qui sum mean_education,de ;
local column1:  di %6.2f r(mean) ;
qui sum std_education;
local column2:  di %6.2f r(mean) ;
file write resultsfile "  Children Final Years of Education             & `column1'  &  `column2'   \\[0.2cm] " _n;


file write resultsfile "\hline \hline" _n;
file write resultsfile "\end{tabular}";
file close resultsfile;




restore;


******************************;
*Appendix B: Table B-10;
******************************;
preserve;

keep  sample_math7 sample_math9 sample_math11 sample_math13     child_id_nlsy mom_id_nlsy 
asvab2 asvab3 asvab4 asvab5 asvab6 asvab8 se1 se2 se3 se4 se5 se6 se8 se9 se10 rotter1 rotter2 rotter3 rotter4;

reshape long sample_math@ , i(child_id_nlsy) j(child_age);

keep if sample_math==1;

collapse asvab2 asvab3 asvab4 asvab5 asvab6 asvab8 se1 se2 se3 se4 se5 se6 se8 se9 se10 rotter1 rotter2 rotter3 rotter4 , by(mom_id_nlsy);

set output proc;
sum asvab2 asvab3 asvab4 asvab5 asvab6 asvab8 se1 se2 se3 se4 se5 se6 se8 se9 se10 rotter1 rotter2 rotter3 rotter4;
set output error;

restore;



};



*******************************************************************************;
*Compute statistics for children and mother measures;
*******************************************************************************;

foreach x in `mom_cog_measures'{;
sum `x';

gen mean_mc_`x' = r(mean);
gen var_mc_`x' = r(Var);

gen min_mc_`x' = r(min);
gen max_mc_`x' = r(max);


by `x' , sort : gen `x'nvals = _n ==1;
count if `x'nvals;

gen nvalues_mc_`x' = r(N);

drop `x'nvals;

};

foreach x in `mom_noncog_measures'{;
sum `x';
gen mean_mnc_`x' = r(mean);
gen var_mnc_`x' = r(Var);

gen min_mnc_`x' = r(min);
gen max_mnc_`x' = r(max);


by `x' , sort : gen `x'nvals = _n ==1;
count if `x'nvals;

gen nvalues_mnc_`x' = r(N);

drop `x'nvals;

};

local all_cogn_childhood = "math5 recog5 comp5 math7 recog7 comp7 math9 recog9 comp9
math11 recog11 comp11 math13 recog13 comp13 ";


foreach x in `all_cogn_childhood'{;
sum `x';

gen mean_cogn_`x' = r(mean);
gen var_cogn_`x' = r(Var);

gen min_cogn_`x' = r(min);
gen max_cogn_`x' = r(max);


by `x' , sort : gen `x'nvals = _n ==1;
count if `x'nvals;

gen nvalues_cogn_`x' = r(N);

drop `x'nvals;

};




local list_inv = "often_mom_reads5 often_museum5 often_mom_reads7 
often_museum7 often_museum9 often_praised9 often_praised11 often_museum11";


foreach x in `list_inv'{;
sum `x';

gen var_inv_`x' = r(Var);

};



*******************************************************************************;
*Collect results;
*******************************************************************************;

keep in 1;
keep 
alpha* gamma* std_inv_age* std_prod_age* log_TFP_age*
var_faminc* var_mom* var_child* cov_mom* corr_mom* cov_child* corr_child* mean_income_age5 inc_beta_const inc_beta_tm2 income_shock_std

mu_asvab* 
mu_se*
mu_rotter*
lambda_asvab*
lambda_se*
lambda_rotter*

mu_math* mu_recog* mu_comp*
lambda_math* lambda_recog* lambda_comp*

mean_*
var_*

min_*
max_*
nvalues_*

lambda_often_*
mu_often_*

var_inv_*
;

*generate missing variables for the mus and lambdas of investment measures we actually do not use.
*This is just to make the reshaping by age balanced.;

gen mu_often_mom_reads9 = .;
gen mu_often_mom_reads11 = .;

gen  mu_often_praised5 = .;
gen  mu_often_praised7 = .;

gen lambda_often_mom_reads9 = .;
gen lambda_often_mom_reads11 = .;

gen  lambda_often_praised5 = .;
gen  lambda_often_praised7 = .;

gen boot_sample = `b';


append using temp_collect.dta;
save temp_collect.dta, replace;

}; *bootstrap loop;

}; *do here bootstrap loop;


gen do_ols = `do_ols';
gen restricted_model = `restricted_model';


cd "$scratch_dir";
save boot_collect_ols`do_ols'_restricted`restricted_model'_new.dta, replace;

